library(foreign)                        #read.spss (not used)
library(memisc)                         #spss.portable.file
library(plyr)                           #ldply
library(stringr)                        #str_sub, str_pad

the_prefix <- ""# your replication material directory

setwd()

read_dir <- "./data" # where you have placed the original KFF files

the_files <- grep("txt|pdf",grep("hni", list.files(read_dir), value=T, ignore.case=T), invert=T, value=T)

the_grep <- "negatively affected|benefited from"

data <- list()
for (the_file in c(grep("por", the_files, value=T), "hni123.csv")) {
    ## read SPSS file
    if (the_file=="hni123.csv") {
        data[[the_file]] <- read.csv(paste0(read_dir, the_file))
    } else {
        data[[the_file]] <- as.data.set(spss.portable.file(paste0(read_dir, the_file)))
        }
    ## make all column names upper case
    names(data[[the_file]]) <- toupper(names(data[[the_file]]))
    orig_names <- 1:length(names(data[[the_file]]))
    if (the_file!="hni123.csv") {
        descriptions <- sapply(data[[the_file]], description)
        if (!any(grepl(the_grep, descriptions) & grepl("VB", names(data[[the_file]])))) {
            data[[the_file]] <- NULL
            next
        }
        }
    ## no weight in this file
    if (the_file=="hni077.por") {
        data[[the_file]]$WEIGHT <- data[[the_file]]$WEIGHT2
    }
    if (the_file=="hni162.por") {
        data[[the_file]]$WEIGHT <- data[[the_file]]$WEIGHT1
    }
    ##
    print(the_file)
    ## this is for assigning years to dates
    file_number <- as.numeric(gsub(".csv", "",gsub("-august", "", gsub(".por", "", gsub("uspsra2015-", "", gsub("hni", "", gsub("^p20[0-9][0-9]", "", the_file)))))))
    print(file_number)
    ##
    ## weird date formats
    if ("INT_DATE" %in% names(data[[the_file]])) {
        ## data[[the_file]]$date_orig <- data[[the_file]]$INT_DATE
        data[[the_file]]$DATE <- as.Date(
            str_pad(data[[the_file]]$INT_DATE, width=6, pad=0),
            format="%m%d%y")
    } else if ("INTVWDAT" %in% names(data[[the_file]])) {
        ## data[[the_file]]$date_orig <- data[[the_file]]$INT_DATE
        data[[the_file]]$DATE <- as.Date(
            data[[the_file]]$INTVWDAT,
            format="%y%m%d")
    } else if ("DATE" %in% names(data[[the_file]])) {
        ## data[[the_file]]$date_orig <- data[[the_file]]$DATE
        the_year <- ifelse(
                    file_number < 83,
                    2009,
                ifelse(
                    file_number < 95,
                    2010,
                ifelse(
                    file_number < 106,
                    2011,
                ifelse(
                    file_number < 117,
                    2012,
                ifelse(
                    file_number < 126,
                    2013
                    )))))
        data[[the_file]]$DATE <- as.Date(
            paste(
                the_year,
                ## month
                str_sub(str_pad(data[[the_file]]$DATE, width=4, pad=0), 1, 2),
                ## day
                str_sub(str_pad(data[[the_file]]$DATE, width=4, pad=0), -2, -1),
                sep="-"
                ),
            format="%Y-%m-%d")
    }
    ## IDs
    if ("ID" %in% names(data[[the_file]])) {
        data[[the_file]]$PSRAID <- data[[the_file]]$ID
        }
    data[[the_file]]$DATE <- as.character(data[[the_file]]$DATE)
    ## survey number
    data[[the_file]]$survey <- file_number
    if (the_file!="hni123.csv") {
    data[[the_file]] <- data.frame(
        lapply(
            data[[the_file]],
            function(x)
                if (class(x)=="character.item") {
                    as.character(x)
                } else if (class(x)=="character") {
                    x
                    } else {
                    as.numeric(x)
                }
        )
    )
    }
    ## BENEFITED
    if (the_file!="hni123.csv") {
        ## [orig_names] added for clarity and to remove warnings for replication materials
    data[[the_file]]$helped <- as.character(data[[the_file]][,which(grepl("benefited from", descriptions) & grepl("So far", descriptions) & !grepl("VB", names(data[[the_file]])[orig_names]))])
    data[[the_file]]$hurt <- as.character(data[[the_file]][,which(grepl("negatively affected", descriptions) & grepl("So far", descriptions) & !grepl("VB", names(data[[the_file]])[orig_names]))])
    data[[the_file]]$helped_vb <- as.character(data[[the_file]][,which(grepl("benefited from", descriptions) & grepl("VB", names(data[[the_file]])[orig_names]))])
    data[[the_file]]$hurt_vb <- as.character(data[[the_file]][,which(grepl("negatively affected", descriptions) & grepl("VB", names(data[[the_file]])[orig_names]))])
    } else {
    data[[the_file]]$helped <- as.character(data[[the_file]]$Q8)
    data[[the_file]]$hurt <- as.character(data[[the_file]]$Q10)
    data[[the_file]]$helped_vb <- as.character(data[[the_file]]$Q9VB)
    data[[the_file]]$hurt_vb <- as.character(data[[the_file]]$Q11VB)
    }
    ##
    data[[the_file]] <- data[[the_file]][,c("DATE","WEIGHT","PSRAID","survey","helped","hurt","helped_vb","hurt_vb")]
}
data <- ldply(data, data.frame)

data$DATE <- as.Date(data$DATE)

data$DATE[data$.id=="hni108.por" & is.na(data$DATE)] <- "2012-03-01" # leap year
data$DATE[data$.id=="hni133.por"] <- "2014-08-31" # spillover
data$DATE[data$.id=="hni108.por"] <- "2012-03-01" # spillover

kff_helped_hurt <- subset(
    data,
    !(.id %in% c("hni123.por","p2013hni119.por","p2014hni133.por","hni083(2).por"))
)

kff_helped_hurt$NUMBER <- kff_helped_hurt$survey

## missing: 110, 115, 123 (missing IDs), 156 and later
##

agg_hurt <- aggregate(hurt == "1" ~ as.Date(paste0(substr(DATE, 1, 7),"-01")), kff_helped_hurt, FUN = mean)
agg_helped <- aggregate(helped == "1" ~ as.Date(paste0(substr(DATE, 1, 7),"-01")), kff_helped_hurt, FUN = mean)
agg_hurt_helped <- aggregate(helped == "1" & hurt == "1" ~ as.Date(paste0(substr(DATE, 1, 7),"-01")), kff_helped_hurt, FUN = mean)

pdf(
    paste0(
        the_prefix,"/figs/figure1_helped_hurt_gave_response_20200113.pdf"),
   width=4.5, height=3.5)
##
par(mar=c(4, 4.2, 4.5, 3))
plot(agg_hurt, type="l", ylim=c(0, 0.3), col="darkorange", bty="n", xlab="Date", ylab="Proportion helped and/or hurt", lwd=3, main="Hurt or Helped by the ACA", cex.lab=1.4, cex.main = 1, cex.axis=1.2)
points(agg_hurt, col="darkorange", pch=16)
lines(agg_helped, type="l", ylim=c(0, 0.3), col="purple", lwd=3)
points(agg_helped, col="purple", pch=16)
lines(agg_hurt_helped, col="green3", pch=16)
points(agg_hurt_helped, col="green3", pch=16, cex=0.8)
text(x = as.Date("2012-06-01"), y = 0.25, labels = "Hurt", col = "darkorange", cex = 1.2)
text(x = as.Date("2012-06-01"), y = 0.1, labels = "Helped", col = "purple", cex = 1.2)
text(x = as.Date("2012-06-01"), y = 0.025, labels = "Both helped and hurt", col = "green3", cex = 1.2)
abline(v = as.Date("2013-10-01"), lty = 1, lwd = 2, col = "black")
## mtext(text = "healthcare.gov opens + \nplan notices/cancellations", at = as.Date("2013-10-01"), side = 3, cex = 0.8)
mtext(text = "Oct '13", at = as.Date("2013-10-01"), side = 3, cex = 1, padj=-1.2, font = 2)
mtext(text = "Jan '14", at = as.Date("2014-01-01"), side = 3, cex = 1, font = 2)
abline(v = as.Date("2014-01-01"), lty = 1, lwd = 2, col = "black")
mean_hurt <- mean(agg_hurt[agg_hurt[,1] < "2012-01-01",2])
mean_helped <- mean(agg_helped[agg_helped[,1] < "2012-01-01",2])
abline(h = mean_hurt, col = "darkorange", lty = 2, lwd = 2)
abline(h = mean_helped, col = "purple", lty = 2, lwd = 2)
mtext(text = "Avg '10/'11", at = mean_hurt, side = 4, cex = 1, las = 2, adj = 0.35, padj=-0.3, col = "darkorange")
mtext(text = "Avg '10/'11", at = mean_helped, side = 4, cex = 1, las = 2, adj = 0.35, padj=1, col = "purple")
## abline(h = mean(agg_hurt_helped[agg_hurt_helped[,1] < "2012-01-01",2]))
## legend("topleft", legend=c("hurt","helped"), col=c("red","blue"), lty=1, bty="n")
dev.off()
